In [1]:
%pylab inline
from numpy.lib.recfunctions import *
In [3]:
def filternan(a):
return a[isnan(a) != True]
In [15]:
segments = genfromtxt("segments.csv", delimiter=',', names=True)
names = genfromtxt("names.csv", delimiter=',', names=True)
callsigns = genfromtxt("callsigns.csv", delimiter=',', names=True)
imos = genfromtxt("imos.csv", delimiter=',', names=True)
fishing = genfromtxt("../Combinedfishing_2014.csv", delimiter=',', names=True)
fishing = append_fields(fishing, 'fishing', [], dtypes='<f4')
fishing['fishing'] = 1.0
tileset = genfromtxt("pipeline_205_08_07_13_33_31_mmsi_list.csv", delimiter=',', names=True)
tileset = append_fields(tileset, 'tileset', [], dtypes='<f4')
tileset['tileset'] = 1.0
d = join_by('mmsi', segments, names, jointype='outer', asrecarray=True)
d = join_by('mmsi', d, imos, jointype='outer', asrecarray=True)
d = join_by('mmsi', d, callsigns, jointype='outer', asrecarray=True)
d = join_by('mmsi', d, fishing, jointype='outer', asrecarray=True)
d = join_by('mmsi', d, tileset, jointype='outer', asrecarray=True)
d = d.filled(nan)
savez("mmsi_counts.npz", d)
In [30]:
d = load("mmsi_counts.npz")['arr_0']
In [31]:
f = d[d['fishing'] == 1]
histfig = figure(figsize=(20,5))
hist(filternan(f['seg_count']), bins=50, normed=False, color='b', alpha=0.5, log=True, label="segment count")
legend(); show()
histfig = figure(figsize=(20,5))
hist(filternan(f['radius']), bins=200, normed=False, color='r', alpha=0.5, log=True, label="radius")
legend(); show()
histfig = figure(figsize=(20,5))
hist(filternan(f['name_count']), bins=200, normed=False, color='b', alpha=0.5, log=True, label="name count")
legend(); show()
histfig = figure(figsize=(20,5))
hist(filternan(f['imo_count']), bins=200, normed=False, color='b', alpha=0.5, log=True, label="imo count")
legend(); show()
histfig = figure(figsize=(20,5))
hist(filternan(f['callsign_count']), bins=200, normed=False, color='b', alpha=0.5, log=True, label="callsign count")
legend(); show()
In [69]:
def compare(col1, col2):
col1_coord = unique(filternan(d[col1]))
col1_coord.sort()
col2_coord = unique(filternan(d[col2]))
col2_coord.sort()
all_counts = zeros((col1_coord.shape[0], col2_coord.shape[0]))
for i1, c1 in enumerate(col1_coord):
for i2, c2 in enumerate(col2_coord):
v = d[d[col1] > c1]
v = v[v[col2] > c2]
all_counts[i1,i2] = v.shape[0]
return col1_coord, col2_coord, all_counts
In [87]:
c1, c2, p = compare('seg_count', 'name_count')
In [86]:
x = log(p)
imshow(x); show()
imshow(x[:20,:20]); show()
imshow(x[:10,:10]); show()
In [48]:
f = d[d['tileset'] == 1]
print "#### 0.2"
print f[f['seg_count'] == 2.0][200]['mmsi']
print f[f['seg_count'] == 2.0][201]['mmsi']
print f[f['seg_count'] == 2.0][202]['mmsi']
print f[f['seg_count'] == 2.0][203]['mmsi']
print "#### 0.5"
print f[f['seg_count'] == 5.0][200]['mmsi']
print f[f['seg_count'] == 5.0][201]['mmsi']
print f[f['seg_count'] == 5.0][202]['mmsi']
print f[f['seg_count'] == 5.0][203]['mmsi']
0.2
0.5
In [49]:
f = d[d['tileset'] == 1]
print "#### 0.2"
print f[f['name_count'] == 2.0][200]['mmsi']
print f[f['name_count'] == 2.0][201]['mmsi']
print f[f['name_count'] == 2.0][202]['mmsi']
print f[f['name_count'] == 2.0][203]['mmsi']
print "#### 0.5"
print f[f['name_count'] == 5.0][200]['mmsi']
print f[f['name_count'] == 5.0][201]['mmsi']
print f[f['name_count'] == 5.0][202]['mmsi']
print f[f['name_count'] == 5.0][203]['mmsi']
In [ ]: